# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from zc_core.dao.item_data_dao import ItemDataDao
from cpeinet.rules import *
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from datetime import datetime
from zc_core.util.http_util import *
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full_catalog'

    item_url = "http://emall.cpeinet.com.cn/visitor/goodsBuyer/goodsdetails.do?goodId={}"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):

        pool_list = ItemDataDao().get_batch_data_list(self.batch_no, query={
            "catalog2Name": {"$exists": False}
        }, fields={"_id": 1, "catalog3Id": 1, "batchNo": 1})
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            _id = sku.get('_id')
            catalog3_id = sku.get('catalog3Id')
            # 采集商品
            yield Request(
                url=self.item_url.format(_id),
                meta={
                    'reqType': 'item',
                    'skuId': _id,
                    'batchNo': self.batch_no,
                    'catalog3Id': catalog3_id,
                },
                callback=self.parse_item_data,
                errback=self.error_back
            )

    # 处理ItemData
    def parse_item_data(self, response):
        # 处理商品详情页
        data = full_catalog(response)
        if data:
            self.logger.info('商品: [%s]' % data.get('skuId'))
            yield data
